#Author: Carolina Moehlecke
#Paper: The Chilling Effect of International Investment Disputes: Limited Challenges to State Sovereignty
#Version: ISQ final submission
#Purpose: Replication data for all tests, figures and appendix 
#Created: 06/13/2018
#Last edited: 08/14/2019

#####PREPARATION#####

#clean up environment
rm(list = ls(all = TRUE))

#set up directory
#setwd("Your working directory here")
setwd("C:/Users/Carolina/OneDrive/Documentos/@A - UT Austin/1 - Fall 2017/Chilling Effect for Journal Submission - Next/ISQ/Final Manuscript for ISQ")

#load packages
library(foreign)
library(eha)
library(ggplot2)
library(gridExtra)
library(gplots)
library(flexsurv)
library(data.table)
library(stargazer)
library(xtable)
library(extrafont)
library(ggthemes)
library(survival)
library(RCurl)

#####ESTIMATION OF COEFFICIENTS FOR SPEEDS OF DIFFUSION#####

#Purpose: estimation of coefficients for speeds of diffusion of anti-smoking policies, as reported on figures 1 and 2.

#Note: this section of the script was replicated and adapted from Mallinson, Daniel J. 2015. "Building a Better Speed Trap: Measuring Policy
## Adoption Speed in the American States." State Politics & Policy Quarterly 16 (1): 98-120.

#Load policy adoption dataset
finaldata <- read.csv("paper_speedata.csv")
data <- finaldata

#Prepare for performing for loops
max <- length(data)
sequence <- seq(4,max,3)

#Pull out the first year of adoption for each policy
for(i in sequence){
  policy <- data.frame(data[c(i)])
  policy.min <- min(policy, na.rm=TRUE)
  if(i==4){
    firstadopt <- policy.min
  }
  else{
    firstadopt <- rbind(firstadopt, policy.min)
  }
}

#Pull out the last year of adoption for each policy
for(i in sequence){
  policy <- data.frame(data[c(i)])
  policy.max <- max(policy, na.rm=TRUE)
  if(i==4){
    lastadopt <- policy.max
  }
  else{
    lastadopt <- rbind(lastadopt, policy.max)
  }
}

#Create a two column dataframe with first and last adoption
totalmonths <- lastadopt-firstadopt
firstadopt <- as.data.frame(firstadopt)
row.names(firstadopt) <- NULL
names(firstadopt) <- "first"
lastadopt <- as.data.frame(lastadopt)
row.names(lastadopt) <- NULL
names(lastadopt) <- "last"
totalmonths <- as.data.frame(totalmonths)
row.names(totalmonths) <- NULL
names(totalmonths) <- "totalmonths"
adoptyears <- cbind(firstadopt, lastadopt, totalmonths)

##Calculate the total number of adopting states for each policy
for(i in sequence){
  policy <- data.frame(data[c(i)])
  policy.sum <- sum(complete.cases(policy))
  if(i==4){
    totalstate <- policy.sum
  }
  else{
    totalstate <- rbind(totalstate, policy.sum)
  }
}

totalstate <- as.data.frame(totalstate)
names(totalstate) <- "totalstates"


#estimating the coefficients

max <- length(finaldata)

sequence <- seq(4,max,3)

for(i in sequence){
  output <- survreg(Surv(time=finaldata[,i+2], event=finaldata[,i+1], type="right")~1, dist="weibull", data=finaldata)
  if(i==4){
    const <- as.data.frame(output$coefficients)
    logp <- as.data.frame(log(1/output$scale))
    se <- as.matrix(sqrt(diag(output$var)))
    se.const <- as.data.frame(se[1,1])
    se.logp <- as.data.frame(se[2,1])
    names(const) <- paste(colnames(finaldata[i]))
    names(logp) <- paste(colnames(finaldata[i]))
    names(se.const) <- paste(colnames(finaldata[i]))
    names(se.logp) <- paste(colnames(finaldata[i]))
    const.history <- const
    logp.history <- logp
    se.const.history <- se.const
    se.logp.history <- se.logp
  }
  else{
    const <- as.data.frame(output$coefficients)
    logp <- as.data.frame(log(1/output$scale))
    se <- as.matrix(sqrt(diag(output$var)))
    se.const <- as.data.frame(se[1,1])
    se.logp <- as.data.frame(se[2,1])
    names(const) <- paste(colnames(finaldata[i]))
    names(logp) <- paste(colnames(finaldata[i]))
    names(se.const) <- paste(colnames(finaldata[i]))
    names(se.logp) <- paste(colnames(finaldata[i]))
    const.history <- cbind(const.history, const)
    logp.history <- cbind(logp.history, logp)
    se.const.history <- cbind(se.const.history, se.const)
    se.logp.history <- cbind(se.logp.history, se.logp)
  }
}



const.history <- as.data.frame(t(const.history))
names(const.history) <- "weib.speed"
se.const.history <- as.data.frame(t(se.const.history))
names(se.const.history) <- "weib.speed.se"
logp.history <- as.data.frame(t(logp.history))
names(logp.history) <- "weib.scale"
se.logp.history <- as.data.frame(t(se.logp.history))
names(se.logp.history) <- "weib.scale.se"


#putting the output together
speedresults <- cbind(adoptyears, totalstate, const.history, se.const.history, logp.history, se.logp.history)
speedresults


#create column for standardized measure of weibspeed
##Rescale speed coefficient to range from zero to one
#First, subtract all speed values from 1 so they range from slowest to fastest
#(Note: "weib.speed" coefficients, before transformation, are akin to an average adoption rate. 
#Thus, larger numbers mean slower adoption.)

weib.rescale.speed <- 1-speedresults$weib.speed
weib.rescale.speed <- as.data.frame((weib.rescale.speed - min(weib.rescale.speed))/(max(weib.rescale.speed) - min(weib.rescale.speed)))
names(weib.rescale.speed) <- "weib.rescale.speed"

speedresultsr <- cbind(speedresults, weib.rescale.speed)

speedresultsr

#tidying up speedresultsr for displaying in the appendix (table 3 in the appendix)

speedresultapp <- speedresultsr

speedresultapp <- speedresultapp[order(weib.rescale.speed),]

names(speedresultapp)[names(speedresultapp) == 'first'] <- '1st Adopt'
names(speedresultapp)[names(speedresultapp) == 'last'] <- 'Last Adopt'
names(speedresultapp)[names(speedresultapp) == 'totalmonths'] <- 'Time'
names(speedresultapp)[names(speedresultapp) == 'totalstates'] <- '# States'
names(speedresultapp)[names(speedresultapp) == 'weib.speed'] <- 'Coef.'
names(speedresultapp)[names(speedresultapp) == 'weib.speed.se'] <- 'SE'
names(speedresultapp)[names(speedresultapp) == 'weib.rescale.speed'] <- 'Res. Coef.'

speedresultapp <- speedresultapp[-c(7,8)]

speedresultapp

setattr(speedresultapp, "row.names", c("Size of Warnings 80%", "Plain Packaging",
                                       "Restaurants & Pubs", "Misleading Packaging", 
                                       "Transport", "Physical Branding",
                                       "Pictures Required", "TV & Radio",
                                       "Size of Warnings 50%"))

xtable(speedresultapp) #as reported in table 3 in the appendix
#the row for "Single-Presentation" was added manually because the lack of diffusion for this policy prevented
#its estimation


#####FIGURES FOR SECTION 4.2#####

#reading in data
graphsmonths1973 <- read.csv("paper_graphsdata.csv")


#Figure 1 (Uruguay Plot)
sw80sp <- ggplot(graphsmonths1973, aes(X)) + geom_point(aes(y = transport, color = "Transport")) + 
  geom_point(aes(y = tvradio, color = "TV/Radio")) + 
  geom_point(aes(y = restaurantsandpubs, color = "Restaurants")) + 
  geom_point(aes(y = physicalbrand, color = "Physical Brand")) + 
  geom_point(aes(y = misleading, color = "Misleading")) +
  geom_point(aes(y = picrequired, color = "Picture Required")) +
  geom_point(aes(y = sizewarning50, color = "Size Warning > 50")) + 
  geom_point(aes(y = sizewarning80, color = "Size Warning > 80")) + 
  geom_point(aes(y = singlebrand, color = "Single Presentation")) +
  scale_color_manual(values = c("Transport" = "gray69", "TV/Radio" = "gray69", "Restaurants" = "gray69", 
                                "Physical Brand" = "gray69", "gray69" = "gray69", "Picture Required" = "gray69", 
                                "Size Warning > 50" = "gray69", "Size Warning > 80" = "gray4", "Single Presentation" = "gray4"))+
  geom_vline(xintercept = 444)+
  geom_vline(xintercept = 521)+
  geom_text(aes(x = 610, y = 90, label = "TV and Radio (0.87)", color = "TV/Radio", family = "Cambria"))+
  geom_text(aes(x = 630, y = 62, label = "Picture Required (0.85)", color = "Picture Required", family = "Cambria"))+
  geom_text(aes(x = 625, y = 65, label = "Physical Brands (0.74)", color = "Physical Brand", family = "Cambria"))+
  geom_text(aes(x = 610, y = 55, label = "Misleading (0.71)", color = "Misleading", family = "Cambria"))+
  geom_text(aes(x = 600, y = 59, label = "Transport (0.73)", color = "Transport", family = "Cambria"))+
  geom_text(aes(x = 610, y = 48, label = "Restaurants (0.65)", color = "Restaurants", family = "Cambria"))+
  geom_text(aes(x = 610, y = 32, label = "Size of \n Warnings 50% (1)", color = "Size Warning > 50", family = "Cambria"))+
  geom_text(aes(x = 610, y = 8, label = "Size of \n Warnings 80% (0)", color = "Size Warning > 80", family = "Cambria"))+
  geom_text(aes(x = 610, y = 2, label = "Single-Brand (NA)", color = "Single Presentation", family = "Cambria"))+
  geom_text(aes(x = 396, y = 115, label = "Claim filed \n Feb-10"), size = 4, family = "Cambria")+
  geom_text(aes(x = 600, y = 115, label = "Claim dismissed \n Jul-16"), size = 4, family = "Cambria")+
  xlab("") + ylab("Cumulative Number of Adopting Countries") + 
  scale_y_continuous(limits = c(0,120), expand = c(0,0), breaks = c(0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95))+ 
  scale_x_continuous(limits = c(0,730), expand = c(0,0), breaks = c(0,155,275,395,526), 
                     labels=c("1973", "1986", "1996", "2006", "2016"))+
  ggtitle("") +  theme_bw() +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        legend.position="none", axis.text = element_text(size=14), axis.title = element_text(size = 12),
        text = element_text(family = "Cambria", size = 14, color = "black"))


sw80sp

ggsave("figure1.eps", plot=sw80sp, width=6.5, height =9, units="in", dpi = 400)
ggsave("figure1.jpeg", plot=sw80sp, width=6.5, height =9, units="in", dpi = 400)

#Figure 2 (Australia Plot)

pp <- ggplot(graphsmonths1973, aes(X)) + geom_point(aes(y = transport, color = "Transport")) + 
  geom_point(aes(y = tvradio, color = "TV/Radio")) + 
  geom_point(aes(y = restaurantsandpubs, color = "Restaurants")) + 
  geom_point(aes(y = physicalbrand, color = "Physical Brand")) + 
  geom_point(aes(y = misleading, color = "Misleading")) +
  geom_point(aes(y = picrequired, color = "Picture Required")) +
  geom_point(aes(y = sizewarning50, color = "Size Warning > 50")) + 
  geom_point(aes(y = plainpack, color = "Plain Packaging")) +
  scale_color_manual(values = c("Transport" = "gray69", "TV/Radio" = "gray69", "Restaurants" = "gray69", 
                                "Physical Brand" = "gray69", "Misleading" = "gray69", "Picture Required" = "gray69", 
                                "Size Warning > 50" = "gray69", "Plain Packaging" = "gray4"))+
  geom_vline(xintercept = 465)+
  geom_vline(xintercept = 514)+
  geom_text(aes(x = 610, y = 90, label = "TV and Radio (0.87)",color = "TV/Radio", family = "Cambria"))+
  geom_text(aes(x = 630, y = 62, label = "Picture Required (0.85)", color = "Picture Required", family = "Cambria"))+
  geom_text(aes(x = 625, y = 65, label = "Physical Brands (0.74)", color = "Physical Brand", family = "Cambria"))+
  geom_text(aes(x = 603, y = 55, label = "Misleading (0.71)", color = "Misleading", family = "Cambria"))+
  geom_text(aes(x = 600, y = 59, label = "Transport (0.73)", color = "Transport", family = "Cambria"))+
  geom_text(aes(x = 610, y = 48, label = "Restaurants (0.65)", color = "Restaurants", family = "Cambria"))+
  geom_text(aes(x = 610, y = 32, label = "Size of \n Warnings 50% (1)", color = "Size Warning > 50", family = "Cambria"))+
  geom_text(aes(x = 588, y = 9, label = "Plain \n Packaging (0.49)", color = "Plain Packaging", family = "Cambria"))+
  geom_text(aes(x = 418, y = 115, label = "Claim filed \n Nov-11"), size = 4, family = "Cambria")+
  geom_text(aes(x = 592, y = 115, label = "Claim dismissed \n Dec-15"), size = 4, family = "Cambria")+
  xlab("") + ylab("Cumulative Number of Adopting Countries") + 
  scale_y_continuous(limits = c(0,120), expand = c(0,0), breaks = c(0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95))+ 
  scale_x_continuous(limits = c(0,730), expand = c(0,0), 
                     breaks = c(0,155,275,395,526),  
                     labels=c("1973", "1986", "1996", "2006", "2016"))+
  ggtitle("") +  theme_bw() +
  theme(panel.border = element_blank(), panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
        legend.position="none", axis.text = element_text(size=14), axis.title = element_text(size = 12),
        text = element_text(family = "Cambria", size = 14, color = "black"))

pp

ggsave("figure2.eps", plot=pp, width=6.5, height =9, units="in", dpi = 400)
ggsave("figure2.jpeg", plot=pp, width=6.5, height =9, units="in", dpi = 400)



#####LOG-RANK TESTS#####

#log-rank tests as reported in table 1 in the paper, plus the robustness checks reported in tables 4 and 5 in the 
#appendix

#load data
merged.data.pooled <- read.csv("merged.data.pooled.new.csv")

#subset data between undisputed and disputed policies
undisputed1 <- subset(merged.data.pooled, merged.data.pooled$policydummy == "sizewarning50")
undisputed2 <- subset(merged.data.pooled, merged.data.pooled$policydummy == "picture")
undisputed3 <- subset(merged.data.pooled, merged.data.pooled$policydummy == "misleading")
undisputed4 <- subset(merged.data.pooled, merged.data.pooled$policydummy == "transport")
undisputed5 <- subset(merged.data.pooled, merged.data.pooled$policydummy == "restandpub")
undisputed6 <- subset(merged.data.pooled, merged.data.pooled$policydummy == "physicalbrand")
undisputed7 <- subset(merged.data.pooled, merged.data.pooled$policydummy == "tvradio")

undisputed <- rbind(undisputed1, undisputed2, undisputed3, undisputed4, undisputed5, undisputed6, undisputed7)

disputed1 <- subset(merged.data.pooled, merged.data.pooled$policydummy == "sizewarning80")
disputed2 <- subset(merged.data.pooled, merged.data.pooled$policydummy == "plainpack")

disputed <- rbind(disputed1,disputed2)

#make sure arbitration.agg.cat is a factor variable (categorical) in all subsets
merged.data.pooled$arbitration.agg.cat <- as.factor(merged.data.pooled$arbitration.agg.cat)
disputed$arbitration.agg.cat <- as.factor(disputed$arbitration.agg.cat)
undisputed$arbitration.agg.cat <- as.factor(undisputed$arbitration.agg.cat)

########LOG-RANK TESTS########

#Reported in Table 1 (Article)
#Null hypothesis: survival curves are identical.

#Compare the two disputed policies with the seven undisputed ones - first row in table 1 paper
merged.data.pooled$disputed <- ifelse(merged.data.pooled$disputed.aus == 1 | merged.data.pooled$disputed.uru == 1,1,0)

lrk1 <- survdiff(Surv(duration, adopt) ~ disputed, data = merged.data.pooled)
lrk1
#reject the null, p < 0.001

#Compare the disputed policy against URU with Size Warning 50 - most comparable policies - second row in table 1 paper
t1 <- subset(merged.data.pooled, merged.data.pooled$policydummy == "sizewarning80")
t2 <- subset(merged.data.pooled, merged.data.pooled$policydummy == "sizewarning50")

t3 <- rbind(t1,t2)

lrk2 <- survdiff(Surv(duration, adopt) ~ disputed.uru, data = t3)
lrk2
#reject the null, p < 0.001

#Compare the disputed policy against AUS with Size Warning 50 - most comparable policies - third row in table 1 paper
t4 <- subset(merged.data.pooled, merged.data.pooled$policydummy == "plainpack")
t5 <- rbind(t2,t4)

lrk3 <- survdiff(Surv(duration, adopt) ~ disputed.aus, data = t5)
lrk3
#reject the null, p <- 0.001

#Reported in Table 4 - Appendix 

#Compare the disputed policy against URU with the seven undisputed ones. It excludes
#the disputed policy against AUS. - first row in table 4 appendix
lrk4 <- survdiff(Surv(duration, adopt) ~ disputed.uru, data = merged.data.pooled, subset = policydummy != "plainpack")
lrk4
#reject the null, p < 0.001

#Compare the disputed policy against AUS with the seven undisputed policy. It excludes
#the disputed policy against URU. - second row in table 4 appendix
lrk5 <- survdiff(Surv(duration, adopt) ~ disputed.aus, data = merged.data.pooled, subset = policydummy != "sizewarning80")
lrk5
#reject the null, p < 0.001

#Reported in Table 5 - Appendix

#placebo test - SW80 e SW50, first row in table 5 appendix
lrk6 <- survdiff(Surv(duration, adopt) ~ policydummy, data = t3)
lrk6
#can't reject the null, p = 0.4. Policies are not inherently different from each other.

#placebo test - plain pack and SW50 - second row in table 5 appendix
lrk7 <- survdiff(Surv(duration, adopt) ~ policydummy, data = t5)
lrk7
#can't reject the null, p = 0.7. Policies are not inherently different from each other.

#further check - not reported in the paper - compare the policy disputed against AUS and the one disputed 
#against URU. They shouldn't differ, given that they have arbitration in common.

t6 <- rbind(t1,t4)
lrk8 <- survdiff(Surv(duration, adopt) ~ policydummy, data = t6)
lrk8
#can't reject the null, p = 0.3, confirms expectations


#Reported in Table 6 - Appendix

#more log-rank tests - the goal is to see if undisputed policies are all similar to each other and that is only 
#(or mainly) arbitration that could explain the differences between disputed and undisputed policies.

#sw50 vs picture
ta <- rbind(undisputed1, undisputed2)
lrk9 <- survdiff(Surv(duration, adopt) ~ policydummy, data = ta)
lrk9

#sw50 vs misleading
tb <- rbind(undisputed1, undisputed3)
lrk10 <- survdiff(Surv(duration, adopt) ~ policydummy, data = tb)
lrk10

#sw50 vs transport
tc <- rbind(undisputed1, undisputed4)
lrk11 <- survdiff(Surv(duration, adopt) ~ policydummy, data = tc)
lrk11

#sw50 vs restandpub
td <- rbind(undisputed1, undisputed5)
lrk12 <- survdiff(Surv(duration, adopt) ~ policydummy, data = td)
lrk12

#sw50 vs physicalbrand
te <- rbind(undisputed1, undisputed6)
lrk13 <- survdiff(Surv(duration, adopt) ~ policydummy, data = te)
lrk13

#sw50 vs tvradio
tf <- rbind(undisputed1, undisputed7)
lrk14 <- survdiff(Surv(duration, adopt) ~ policydummy, data = tf)
lrk14

#sw50 is different from all other undisputed policies - the null is always rejected.
#However, it is not different from the disputed policies, as shown in the placebo tests.
#(Which makes sense, since sw50 is, qualitatively, the policy more similar to sw80 and plainpack). 
#What is interesting though is that is that when the treatment is arbitration, sw50 and plainpack and sw50 and sw80,
#are different, as reported above.

#picture vs all

#picture vs misleading
tg <- rbind(undisputed2, undisputed3)
lrk15 <- survdiff(Surv(duration, adopt) ~ policydummy, data = tg)
lrk15

#picture vs transport
th <- rbind(undisputed2, undisputed4)
lrk16 <- survdiff(Surv(duration, adopt) ~ policydummy, data = th)
lrk16

#picture v restandpub
ti <- rbind(undisputed2, undisputed5)
lrk17 <- survdiff(Surv(duration, adopt) ~ policydummy, data = ti)
lrk17

#picture v physicalbrand
tj <- rbind(undisputed2, undisputed6)
lrk18 <- survdiff(Surv(duration, adopt) ~ policydummy, data = tj)
lrk18

#picture v tvradio
tk <- rbind(undisputed2, undisputed7)
lrk19 <- survdiff(Surv(duration, adopt) ~ policydummy, data = tk)
lrk19

#picture is different from all other undisputed policies, except relative to tvradio. 

#misleading vs all

#misleading v transport
tl <- rbind(undisputed3, undisputed4)
lrk20 <- survdiff(Surv(duration, adopt) ~ policydummy, data = tl)
lrk20

#misleading v restandpub
tm <- rbind(undisputed3, undisputed5)
lrk21 <- survdiff(Surv(duration, adopt) ~ policydummy, data = tm)
lrk21

#misleading v physicalbrand
tn <- rbind(undisputed3, undisputed6)
lrk22 <- survdiff(Surv(duration, adopt) ~ policydummy, data = tn)
lrk22

#misleading v tvradio
to <- rbind(undisputed3, undisputed7)
lrk23 <- survdiff(Surv(duration, adopt) ~ policydummy, data = to)
lrk23

#misleading is similar to transport, restandpub and physicalbrand. Different from sw50, picture and tvradio.

#transport vs all

#transport v restandpub
tp <- rbind(undisputed4, undisputed5)
lrk24 <- survdiff(Surv(duration, adopt) ~ policydummy, data = tp)
lrk24

#transport v. physicalbrand
tq <- rbind(undisputed4, undisputed6)
lrk25 <- survdiff(Surv(duration, adopt) ~ policydummy, data = tq)
lrk25

#transport v. tvradio
tr <- rbind(undisputed4, undisputed7)
lrk26 <- survdiff(Surv(duration, adopt) ~ policydummy, data = tr)
lrk26

#transport is similar to misleading, restandpub and physical brand. Different from tv/radio

#restpub vs all

#restandpub v physicalbrand
ts <- rbind(undisputed5, undisputed6)
lrk27 <- survdiff(Surv(duration, adopt) ~ policydummy, data = ts)
lrk27

#restandpub v tvradio
tt <- rbind(undisputed5, undisputed7)
lrk28 <- survdiff(Surv(duration, adopt) ~ policydummy, data = tt)
lrk28

#restandpub is similar to misleading and transport, and physical brand. Different from tv/radio

#physical brands vs all

#physical brands v tv/radio
tu <- rbind(undisputed6, undisputed7)
lrk29 <- survdiff(Surv(duration, adopt) ~ policydummy, data = tu)
lrk28

#physical brand is similar to transport and restandapub, and restandpub
#tv radio is just similar to picture

########AFT MODELS########
#Appendix - section 5


#Table 7 in the appendix - undisputed policies - model with clustered std errors is the one reported
undisputed.model.cl <- survreg(Surv(time = undisputed$duration, event = undisputed$adopt, type = "right")
                                   ~ arbitration.agg.cat + OECDmem_MEM + fctc.inforce + disputedadopt + undisputedadopt +
                                     infant.mortality + polconv + v2dlunivl + fctc.members + cluster(country) + as.factor(policydummy), dist = "weibull",
                                   data = undisputed)

summary(undisputed.model.cl)


#without clustering

undisputed.model <- survreg(Surv(time = undisputed$duration, event = undisputed$adopt, type = "right")
                                ~ arbitration.agg.cat + OECDmem_MEM + fctc.inforce + disputedadopt + undisputedadopt +
                                  infant.mortality + polconv + v2dlunivl + fctc.members + as.factor(policydummy), dist = "weibull",
                                data = undisputed)

summary(undisputed.model) #results are robust

#Table 7 in the appendix - disputed policies - model with clustered std errors is the one reported

disputed.model.cl <- survreg(Surv(time = disputed$duration, event = disputed$adopt, type = "right")
                                 ~ arbitration.agg.cat + OECDmem_MEM + fctc.inforce + disputedadopt + undisputedadopt +
                                   infant.mortality + polconv + v2dlunivl + fctc.members + cluster(country) + as.factor(policydummy), dist = "weibull",
                                 data = disputed)

summary(disputed.model.cl)

#without clustering

disputed.model <- survreg(Surv(time = disputed$duration, event = disputed$adopt, type = "right")
                              ~ arbitration.agg.cat + OECDmem_MEM + fctc.inforce + disputedadopt + undisputedadopt +
                                infant.mortality + polconv + v2dlunivl + fctc.members  + as.factor(policydummy), dist = "weibull",
                              data = disputed)

summary(disputed.model) #results are robust


stargazer(undisputed.model.rob, disputed.model.rob)#standard errors and level of significance have been adjusted manually since stargazer doesn't deal well with clustered std errors.


#####APPENDIX - OTHER TESTS#####

#####SAMPLE AND POPULATION BALANCE#####
#as reported in table 2 in the appendix

#Reading in the dataset: population of countries from WDI (215 countries, including the sample of 91 countries used.
#Taiwan is excluded because the WDI does not have indicators for this jurisdiction)
balance <- read.csv("appendix_samplebalance.csv")

#Treating variables "sample" and "interested" as factors. Sample = 1 for countries in the sample, 0 otherwise.
#Interested = 1 for countries interested in implementing plain packaging, 0 otherwise.
balance$sample <- as.factor(balance$sample)
balance$interested <- as.factor(balance$interested)


#subsetting the sample used in the study
sample <- subset(balance, sample == 1)

#t-test for gdp per capita
gdppcsample <- as.vector(sample$gdppc)
gdppcpop <- as.vector(balance$gdppc)

t.test(gdppcsample, gdppcpop) #not significant


#data is heavily skewed
hist(gdppcsample)
hist(gdppcpop)

#so I employ a logarithmic transformation 
lngdppcsample <- log(gdppcsample)
hist(lngdppcsample)

lngdppcpop <- log(gdppcpop)
hist(lngdppcpop)

t.test(lngdppcsample, lngdppcpop) #not significant

#t-test for smoking among male population
smokingmalesample <- as.vector(sample$smokingmale)
smokingmalepop <- as.vector(balance$smokingmale)

t.test(smokingmalesample, smokingmalepop) #not significant

#data is not skewed
hist(smokingmalesample)
hist(smokingmalepop)

#t-test for smoking among female population
smokingfemalesample <- as.vector(sample$smokingfemale)
smokingfemalepop <- as.vector(balance$smokingfemale)

t.test(smokingfemalesample, smokingfemalepop) #significant at 0.001 - p-value:.0038 
#sample has fewer female smokers

#data is heavily skewed
hist(smokingfemalesample)
hist(smokingfemalepop)

#so I employ a logarithmic transformation
lnsmokingfemalesample <- log(smokingfemalesample)
lnsmokingfemalepop <- log(smokingfemalepop)

#data becomes less skewed, although not entirely
hist(lnsmokingfemalesample)
hist(lnsmokingfemalepop)

t.test(lnsmokingfemalesample, lnsmokingfemalepop) #significant at 0.05, p-value 0.028

#t-test for infant mortality
infantsample <- as.vector(sample$infantmortality)
infantpop <- as.vector(balance$infantmortality)

t.test(infantsample, infantpop) #not significant

#data is heavily skewed
hist(infantsample)
hist(infantpop)

#so I employ logarithmic transformation
lninfantsample <- log(infantsample)
lninfantpop <- log(infantpop)

#data becomes less skewed
hist(lninfantsample)
hist(lninfantpop)

t.test(lninfantsample,lninfantpop) #not significant

#t-test for school enrollment 
schoolmean <- as.vector(sample$school)
schoolpop <- as.vector(balance$school)

t.test(schoolmean, schoolpop) #not significant

#data is not skewed
hist(schoolmean)
hist(schoolpop)

#t-test for fdi/gdp
fdimean <- as.vector(sample$fdigdp)
fdipop <- as.vector(balance$fdigdp)

t.test(fdimean, fdipop) #significant - p-value 0.01344 - sample is less reliant on FDI than population

#data is heavily skewed
hist(fdimean)
hist(fdipop)

#####STANDARD ERRORS AND CONFIDENCE INTERVALS#####

#one plot per policy 
#as reported in figure 1 in the appendix

#TRANSPORT

output_transport <- flexsurvreg(Surv(time=transportdur, event=transportind, type="right")~1, dist="weibull", data=finaldata)
summary(output_transport)

output_transport_plot <- ggplot(data.frame(summary(output_transport)), aes(x = time)) + 
  geom_line(aes(y = est, col = "Transport"))+ 
  geom_line(aes(y = lcl))+
  geom_line(aes(y = ucl))+ 
  scale_colour_manual(name="", values=c("blue"))+
  labs(x = "Time (months)", y = "Survival Prob.", col = "Policy") + 
  theme_bw()

output_transport_plot

#RESTAURANT

output_restandpub <- flexsurvreg(Surv(time=restandpubdur, event=restandpubind, type="right")~1, dist="weibull", data=finaldata)
summary(output_restandpub)


output_restandpub_plot <- ggplot(data.frame(summary(output_restandpub)), aes(x = time)) + 
  geom_line(aes(y = est, col = "Restaurants & \n Pubs"))+ 
  geom_line(aes(y = lcl))+
  geom_line(aes(y = ucl))+ 
  scale_colour_manual(name="", values=c("blue"))+
  labs(x = "Time (months)", y = "Survival Prob.", col = "Policy") + 
  theme_bw()

output_restandpub_plot

#PHYSICAL BRAND

output_physbrand <- flexsurvreg(Surv(time=physicalbranddur, event=physicalbrandind, type="right")~1, dist="weibull", data=finaldata)
summary(output_physbrand)


output_physbrand_plot <- ggplot(data.frame(summary(output_physbrand)), aes(x = time)) + 
  geom_line(aes(y = est, col = "Physical\n Branding"))+ 
  geom_line(aes(y = lcl))+
  geom_line(aes(y = ucl))+ 
  scale_colour_manual(name="", values=c("blue"))+
  labs(x = "Time (months)", y = "Survival Prob.", col = "Policy") + 
  theme_bw()

output_physbrand_plot

#TVRADIO

output_tvradio <- flexsurvreg(Surv(time=tvradiodur, event=tvradioind, type="right")~1, dist="weibull", data=finaldata)
summary(output_tvradio)


output_tvradio_plot <- ggplot(data.frame(summary(output_tvradio)), aes(x = time)) + 
  geom_line(aes(y = est, col = "TV/Radio"))+ 
  geom_line(aes(y = lcl))+
  geom_line(aes(y = ucl))+ 
  scale_colour_manual(name="", values=c("blue"))+
  labs(x = "Time (months)", y = "Survival Prob.", col = "Policy") + 
  theme_bw()

output_tvradio_plot

#MISLEADING

output_misleading <- flexsurvreg(Surv(time=misleadingdur, event=misleadingind, type="right")~1, dist="weibull", data=finaldata)
summary(output_misleading)


output_misleading_plot <- ggplot(data.frame(summary(output_misleading)), aes(x = time)) + 
  geom_line(aes(y = est, col = "Misleading \n Packaging"))+ 
  geom_line(aes(y = lcl))+
  geom_line(aes(y = ucl))+ 
  scale_colour_manual(name="", values=c("blue"))+
  labs(x = "Time (months)", y = "Survival Prob.", col = "Policy") + 
  theme_bw()

output_misleading_plot

#PICTURE REQUIRED

output_picture <- flexsurvreg(Surv(time=picturedur, event=pictureind, type="right")~1, dist="weibull", data=finaldata)
summary(output_picture)


output_picture_plot <- ggplot(data.frame(summary(output_picture)), aes(x = time)) + 
  geom_line(aes(y = est, col = "Pictures \n Required"))+ 
  geom_line(aes(y = lcl))+
  geom_line(aes(y = ucl))+ 
  scale_colour_manual(name="", values=c("blue"))+
  labs(x = "Time (months)", y = "Survival Prob.", col = "Policy") + 
  theme_bw()

output_picture_plot

#SIZE WARNING #50

output_sizewarning50 <- flexsurvreg(Surv(time=sizewarning50dur, event=sizewarning50ind, type="right")~1, dist="weibull", data=finaldata)
summary(output_sizewarning50)


output_sizewarning50_plot <- ggplot(data.frame(summary(output_sizewarning50)), aes(x = time)) + 
  geom_line(aes(y = est, col = "Size Warning \n 50%"))+ 
  geom_line(aes(y = lcl))+
  geom_line(aes(y = ucl))+ 
  scale_colour_manual(name="", values=c("blue"))+
  labs(x = "Time (months)", y = "Survival Prob.", col = "Policy") + 
  theme_bw()

output_sizewarning50_plot

#PLAIN PACK

output_plainpack <- flexsurvreg(Surv(time=plainpackdur, event=plainpackind, type="right")~1, dist="weibull", data=finaldata)
summary(output_plainpack)


output_plainpack_plot <- ggplot(data.frame(summary(output_plainpack)), aes(x = time)) + 
  geom_line(aes(y = est, col = "Plain \n Packaging"))+ 
  geom_line(aes(y = lcl))+
  geom_line(aes(y = ucl))+ 
  scale_colour_manual(name="", values=c("blue"))+
  labs(x = "Time (months)", y = "Survival Prob.", col = "Policy") + 
  theme_bw()

output_plainpack_plot

#SIZE WARNING 80


output_sizewarning80 <- flexsurvreg(Surv(time=sizewarningdur, event=sizewarningind, type="right")~1, dist="weibull", data=finaldata)
summary(output_sizewarning80)

output_sizewarning80_plot <- ggplot(data.frame(summary(output_sizewarning80)), aes(x = time)) + 
  geom_line(aes(y = est, col = "Size Warning \n 80%"))+ 
  geom_line(aes(y = lcl))+
  geom_line(aes(y = ucl))+ 
  scale_colour_manual(name="", values=c("blue"))+
  labs(x = "Time (months)", y = "Survival Prob.", col = "Policy") + 
  theme_bw()

output_sizewarning80_plot


#to have all plots in one figure
securves <- grid.arrange(output_sizewarning80_plot+scale_colour_fivethirtyeight()+theme_minimal(), output_plainpack_plot+scale_colour_fivethirtyeight()+theme_minimal(),
                         output_sizewarning50_plot+scale_colour_fivethirtyeight()+theme_minimal(),output_tvradio_plot+scale_colour_fivethirtyeight()+theme_minimal(), 
                         output_picture_plot+scale_colour_fivethirtyeight()+theme_minimal(),output_physbrand_plot+scale_colour_fivethirtyeight()+theme_minimal(), 
                         output_transport_plot+scale_colour_fivethirtyeight()+theme_minimal(), 
                         output_misleading_plot+scale_colour_fivethirtyeight()+theme_minimal(), output_restandpub_plot+scale_colour_fivethirtyeight()+theme_minimal(),  
                         ncol = 3)

securves


ggsave("securves.pdf", plot=securves, width =10, height =4, units="in", dpi = 300)



#end of replication file
